Total factor productivity (TFP) is a measure of productivity calculated by dividing economy-wide total production by the weighted average of inputs i.e. labor and capital. It represents growth in real output which is in excess of the growth in inputs such as labor and capital.
Importing:
# importing data from csv file:
df <- read.table('data/TFP.csv',
header = TRUE,
sep = ',',
dec = '.')
Examine basic dataset information:
# first rows of dataframe:
head(df)
Basic exploration:
Variables initial visualization:
# first column:
unique(df$isocode)
[1] "USA" "CAN" "MEX"
# second column:
unique(df$year)
[1] 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975
[27] 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
[53] 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
# third column
unique(df$rtfpna)
[1] 0.6171479 0.6295884 0.6384513 0.6518582 0.6461794 0.6687729 0.6609999 0.6621648 0.6548125 0.6806795 0.6781228 0.6806923 0.7000680
[14] 0.7090361 0.7232546 0.7401488 0.7561739 0.7483884 0.7585539 0.7544527 0.7398825 0.7519034 0.7642947 0.7768553 0.7502252 0.7439113
[27] 0.7571526 0.7623404 0.7705071 0.7677994 0.7530057 0.7591725 0.7426290 0.7630540 0.7880087 0.8002188 0.8062083 0.8089371 0.8211952
[40] 0.8304957 0.8322658 0.8280669 0.8461575 0.8543829 0.8667796 0.8695626 0.8842039 0.9001899 0.9189172 0.9413751 0.9594316 0.9578253
[53] 0.9669166 0.9759099 0.9920577 1.0000000 1.0040003 1.0065953 0.9984993 0.9867378 1.0094631 1.0203918 0.8433348 0.8551230 0.8964893
[66] 0.9106582 0.8751778 0.9276822 0.9798690 0.9697281 0.9599284 0.9742050 0.9312786 0.9287079 0.9567108 0.9730290 0.9932155 1.0099076
[79] 1.0163369 1.0032632 1.0223480 1.0324813 1.0338035 1.0374722 1.0507104 1.0653431 1.0532701 1.0384669 1.0364398 1.0381832 1.0352786
[92] 1.0218114 1.0035212 1.0036830 0.9832259 0.9934451 1.0219427 1.0362533 1.0207436 1.0235457 1.0308436 1.0222874 1.0007789 0.9740902
[105] 0.9764386 0.9845813 1.0047389 1.0084209 1.0057704 1.0173663 1.0255228 1.0469565 1.0661042 1.0503540 1.0394745 1.0165167 1.0086925
[118] 0.9951318 0.9814328 0.9583570 0.9264822 0.9325666 0.9294054 0.7981593 0.8609314 0.8769677 0.8552045 0.9291679 0.9818486 1.0271603
[131] 1.0740376 1.0915502 1.0855639 1.1482444 1.1298157 1.1326360 1.1914270 1.2908802 1.2937292 1.3025805 1.3068670 1.3471068 1.3312742
[144] 1.3517636 1.3329470 1.3607897 1.3837184 1.3739711 1.3531204 1.3270818 1.3015443 1.3257377 1.3461033 1.3495865 1.3662801 1.2887375
[157] 1.2136229 1.2130910 1.1946880 1.1141175 1.0997505 1.0755730 1.0845842 1.0988975 1.1010851 1.0961676 1.0781175 1.0766171 1.0050045
[170] 1.0232743 1.0445235 1.0514060 1.0501482 1.0727754 1.0367354 1.0050328 0.9912722 0.9949627 1.0117506 1.0136733 0.9915478 0.9158785
[183] 0.9416037 0.9557657
Note:
View of data formats:
str(df)
'data.frame': 186 obs. of 3 variables:
$ isocode: chr "USA" "USA" "USA" "USA" ...
$ year : int 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 ...
$ rtfpna : num 0.617 0.63 0.638 0.652 0.646 ...
# function of skimr package that expands the summary:
skim(df)
-- Data Summary ------------------------
Values
Name df
Number of rows 186
Number of columns 3
_______________________
Column type frequency:
character 1
numeric 2
________________________
Group variables None
-- Variable type: character ------------------------------------------------------------------------------------------------------------
# A tibble: 1 x 8
skim_variable n_missing complete_rate min max empty n_unique whitespace
* <chr> <int> <dbl> <int> <int> <int> <int> <int>
1 isocode 0 1 3 3 0 3 0
-- Variable type: numeric --------------------------------------------------------------------------------------------------------------
# A tibble: 2 x 11
skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
* <chr> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
1 year 0 1 1980. 17.9 1950 1965 1980. 1996 2011 ▇▇▇▇▇
2 rtfpna 0 1 0.976 0.178 0.617 0.855 0.995 1.05 1.38 ▃▂▇▂▂
Converting data to Time Series:
# instalign extensible time series library:
# install.packages('xts')
# importing a more powerful library of manipulation time series:
# library(xts)
# new dataframes created slicing original dataframe
df_USA <- subset(df, df$isocode == "USA")
df_CAN <- subset(df, df$isocode == "CAN")
df_MEX <- subset(df, df$isocode == "MEX")
# time series of each country:
ts_usa <- ts(df_USA$rtfpna, start = 1950)
ts_can <- ts(df_CAN$rtfpna, start = 1950)
ts_mex <- ts(df_MEX$rtfpna, start = 1950)
histogram view of all time series:
# bether way to see data information:
library(DataExplorer)
package 㤼㸱DataExplorer㤼㸲 was built under R version 4.0.2
# USA value distribution:
plot_histogram(ts_usa, title = 'USA - RTF histogram')
# CAN value distribution:
plot_histogram(ts_can, title = 'CAN - RTF histogram')
# MEX value distribution:
plot_histogram(ts_mex, title = 'MEX - RTF histogram')
Note:
# loading library:
library("plotly")
m <- list(
l = 50,
r = 50,
b = 100,
t = 100,
pad = 4
)
# sequence of dates to plots:
ts_years <- seq(from = 1950, to = 2011, by = 1)
# stting figure of each country:
fig_usa <- plot_ly(x = ts_years, y = ts_usa, mode = 'lines', type="scatter")
# seting plot size:
fig_usa <- fig_usa %>% layout(title = "Time serie - USA", autosize = F, width = 900, height = 500, margin = m)
# visualization of usa ts:
fig_usa
# stting figure of each country:
fig_can <- plot_ly(x = ts_years, y = ts_can, mode = 'lines')
# seting plot size:
fig_can <- fig_can %>% layout(title = "Time serie - CAN", autosize = F, width = 900, height = 500, margin = m)
# visualization of can ts:
fig_can
# stting figure of each country:
fig_mex <- plot_ly(x = ts_years, y = ts_mex, mode = 'lines')
# seting plot size:
fig_mex <- fig_mex %>% layout(title = "Time serie - MEX", autosize = F, width = 900, height = 500, margin = m)
# visualization of mex ts:
fig_mex
Note:
Autocorrelation test:
# autocorrelation test in usa:
acf(ts_usa, plot = FALSE)
Autocorrelations of series ‘ts_usa’, by lag
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1.000 0.944 0.890 0.842 0.791 0.734 0.679 0.620 0.559 0.498 0.444 0.391 0.334 0.283 0.236 0.196 0.163 0.136
# autocorrelation test in can:
acf(ts_can, plot = FALSE)
Autocorrelations of series ‘ts_can’, by lag
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1.000 0.841 0.663 0.545 0.433 0.286 0.217 0.186 0.137 0.103 0.101 0.056 0.011 -0.022 -0.064 -0.087 -0.102 -0.127
# autocorrelation test in mex:
acf(ts_mex, plot = FALSE)
Autocorrelations of series ‘ts_mex’, by lag
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
1.000 0.928 0.855 0.778 0.695 0.621 0.555 0.483 0.413 0.335 0.245 0.167 0.089 0.005 -0.072 -0.128 -0.185 -0.246
Basic seasonal test:
# library for simple seasonal test:
library(seastests)
# seasonality test in usa:
isSeasonal(ts_usa)
[1] FALSE
# seasonality test in can:
isSeasonal(ts_can)
[1] FALSE
# seasonality test in mex:
isSeasonal(ts_mex)
[1] FALSE
Visualization of differenced data:
# sequence of dates to plots:
ts_years <- seq(from = 1950, to = 2011, by = 1)
# stting figure of each country:
fig_usa <- plot_ly(x = ts_years[2:length(ts_years)], y = diff(ts_usa), mode = 'lines', type="scatter")
# seting plot size:
fig_usa <- fig_usa %>% layout(title = "Differenced Time serie - USA", autosize = F, width = 900, height = 500, margin = m)
# visualization of usa ts:
fig_usa
# stting figure of each country:
fig_can <- plot_ly(x = ts_years[2:length(ts_years)], y = diff(ts_can), mode = 'lines')
# seting plot size:
fig_can <- fig_can %>% layout(title = "Differenced Time serie - CAN", autosize = F, width = 900, height = 500, margin = m)
# visualization of can ts:
fig_can
# stting figure of each country:
fig_mex <- plot_ly(x = ts_years[2:length(ts_years)], y = diff(ts_mex), mode = 'lines')
# seting plot size:
fig_mex <- fig_mex %>% layout(title = "Differenced Time serie - MEX", autosize = F, width = 900, height = 500, margin = m)
# visualization of mex ts:
fig_mex
Seasonal test with lagged difference:
# library for simple seasonal test:
library(seastests)
# seasonality test in usa:
isSeasonal(diff(ts_usa))
[1] FALSE
# seasonality test in can:
isSeasonal(diff(ts_can))
[1] FALSE
# seasonality test in mex:
isSeasonal(diff(ts_mex))
[1] FALSE
Stationary test in differenced values:
Box.test(ts_usa, lag=1, type="Ljung-Box")
Box-Ljung test
data: ts_usa
X-squared = 57.965, df = 1, p-value = 2.665e-14
Box.test(ts_can, lag=1, type="Ljung-Box")
Box-Ljung test
data: ts_can
X-squared = 46.055, df = 1, p-value = 1.15e-11
Box.test(ts_mex, lag=1, type="Ljung-Box")
Box-Ljung test
data: ts_mex
X-squared = 56.05, df = 1, p-value = 7.061e-14
Note:
# variables to adjust models:
train_usa <- ts_usa[1:(length(ts_usa)-10)]
train_can <- ts_can[1:(length(ts_can)-10)]
train_mex <- ts_mex[1:(length(ts_mex)-10)]
# variable for measuring results:
test_usa <- ts_usa[(length(ts_usa)-9):length(ts_usa)]
test_can <- ts_usa[(length(ts_can)-9):length(ts_can)]
test_mex <- ts_usa[(length(ts_mex)-9):length(ts_mex)]
# library for auto.arima() model:
library(forecast)
# model fit:
arima_usa <- auto.arima( train_usa )
arima_can <- auto.arima( train_can )
arima_mex <- auto.arima( train_mex )
residuals of each model:
# usa:
checkresiduals(arima_usa)
Ljung-Box test
data: Residuals from ARIMA(0,1,0) with drift
Q* = 2.2481, df = 9, p-value = 0.9869
Model df: 1. Total lags used: 10
# can:
checkresiduals(arima_can)
Ljung-Box test
data: Residuals from ARIMA(2,1,2)
Q* = 0.62882, df = 6, p-value = 0.9959
Model df: 4. Total lags used: 10
# mex:
checkresiduals(arima_mex)
Ljung-Box test
data: Residuals from ARIMA(0,2,1)
Q* = 9.9653, df = 9, p-value = 0.3533
Model df: 1. Total lags used: 10
Note:
forecasting:
forecast_usa <- forecast(arima_usa, 10)
forecast_can <- forecast(arima_can, 10)
forecast_mex <- forecast(arima_mex, 10)
Model accuracy measurement:
accuracy(forecast_usa, test_usa)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.173976e-05 0.01142138 0.009289059 -0.02886767 1.229499 0.8085699 0.01745848
Test set 1.492144e-03 0.01198174 0.010157654 0.14951655 1.019562 0.8841771 NA
accuracy(forecast_can, test_can)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.003365734 0.01685372 0.01251965 0.3440134 1.277757 0.8140266 -0.01783164
Test set -0.047857735 0.05003667 0.04785774 -4.8278925 4.827892 3.1117049 NA
accuracy(forecast_mex, test_mex)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.005401946 0.03393699 0.02729705 -0.4490029 2.391014 0.9854180 0.03406867
Test set -0.007081123 0.03178973 0.02377751 -0.7559812 2.401825 0.8583634 NA
Note:
# usa forecasting plot
plot(forecast_usa)
# can forecasting plot
plot(forecast_can)
# mex forecasting plot
plot(forecast_mex)
Model fitting with seasonal component:
# usa model fit:
arima_usa <- auto.arima( train_usa, D = 1)
# can model fit:
arima_can <- auto.arima( train_can, D = 1)
# mex model fit:
arima_mex <- auto.arima( train_mex, D = 1)
# usa model forecasting:
forecast_usa <- forecast(arima_usa, 10)
# can model forecasting:
forecast_can <- forecast(arima_can, 10)
# mex model forecasting:
forecast_mex <- forecast(arima_mex, 10)
# resuidual usa arima:
checkresiduals(arima_usa)
Ljung-Box test
data: Residuals from ARIMA(0,1,0) with drift
Q* = 2.2481, df = 9, p-value = 0.9869
Model df: 1. Total lags used: 10
# resuidual can arima:
checkresiduals(arima_can)
Ljung-Box test
data: Residuals from ARIMA(2,1,2)
Q* = 0.62882, df = 6, p-value = 0.9959
Model df: 4. Total lags used: 10
# resuidual mex arima:
checkresiduals(arima_mex)
Ljung-Box test
data: Residuals from ARIMA(0,2,1)
Q* = 9.9653, df = 9, p-value = 0.3533
Model df: 1. Total lags used: 10
# acurracy test usa:
accuracy(forecast_usa, test_usa)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 1.173976e-05 0.01142138 0.009289059 -0.02886767 1.229499 0.8085699 0.01745848
Test set 1.492144e-03 0.01198174 0.010157654 0.14951655 1.019562 0.8841771 NA
# acurracy test can:
accuracy(forecast_can, test_can)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.003365734 0.01685372 0.01251965 0.3440134 1.277757 0.8140266 -0.01783164
Test set -0.047857735 0.05003667 0.04785774 -4.8278925 4.827892 3.1117049 NA
# acurracy test mex:
accuracy(forecast_mex, test_mex)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.005401946 0.03393699 0.02729705 -0.4490029 2.391014 0.9854180 0.03406867
Test set -0.007081123 0.03178973 0.02377751 -0.7559812 2.401825 0.8583634 NA
Note:
Model fitting with logarithmic transformation:
# usa model fit:
arima_usa <- auto.arima( log(train_usa) )
# can model fit:
arima_can <- auto.arima( log(train_can) )
# mex model fit:
arima_mex <- auto.arima( log(train_mex) )
# usa model forecasting:
forecast_usa <- forecast(arima_usa, 10)
# can model forecasting:
forecast_can <- forecast(arima_can, 10)
# mex model forecasting:
forecast_mex <- forecast(arima_mex, 10)
# resuidual usa arima:
checkresiduals(arima_usa)
Ljung-Box test
data: Residuals from ARIMA(0,1,0) with drift
Q* = 2.7402, df = 9, p-value = 0.9737
Model df: 1. Total lags used: 10
# resuidual can arima:
checkresiduals(arima_can)
Ljung-Box test
data: Residuals from ARIMA(2,1,2)
Q* = 0.47696, df = 6, p-value = 0.9981
Model df: 4. Total lags used: 10
# resuidual mex arima:
checkresiduals(arima_mex)
Ljung-Box test
data: Residuals from ARIMA(0,2,1)
Q* = 11.41, df = 9, p-value = 0.2487
Model df: 1. Total lags used: 10
# acurracy test usa:
accuracy(forecast_usa, test_usa)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -9.447407e-06 0.01520765 0.01226446 -0.8293685 5.779819 0.8112803 -0.04697332
Test set 9.917439e-01 0.99187127 0.99174386 99.5969003 99.596900 65.6027701 NA
# acurracy test can:
accuracy(forecast_can, test_can)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.003533582 0.01750676 0.01287399 -66.40604 124.05883 0.8164014 -0.02693457
Test set 0.953761012 0.95387301 0.95376101 95.75424 95.75424 60.4825395 NA
# acurracy test mex:
accuracy(forecast_mex, test_mex)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.00593543 0.03030626 0.02397011 -23.94604 46.30024 0.9693792 0.01356029
Test set 0.99018494 0.99061860 0.99018494 99.39117 99.39117 40.0442378 NA
Note:
# usa model fit:
There were 21 warnings (use warnings() to see them)
tbats_usa <- tbats( train_usa )
# can model fit:
tbats_can <- tbats( train_can )
# mex model fit:
tbats_mex <- tbats( train_mex )
# resuidual usa arima:
checkresiduals( tbats_usa )
Ljung-Box test
data: Residuals from BATS
Q* = 1.8962, df = 6, p-value = 0.929
Model df: 4. Total lags used: 10
# resuidual can arima:
checkresiduals( tbats_can )
Ljung-Box test
data: Residuals from BATS
Q* = 20.145, df = 5, p-value = 0.001174
Model df: 5. Total lags used: 10
# resuidual mex arima:
checkresiduals( tbats_mex )
Ljung-Box test
data: Residuals from BATS
Q* = 9.4496, df = 5, p-value = 0.09242
Model df: 5. Total lags used: 10
# usa model forecasting:
forecast_usa <- forecast(tbats_usa, 10)
# can model forecasting:
forecast_can <- forecast(tbats_can, 10)
# mex model forecasting:
forecast_mex <- forecast(tbats_mex, 10)
# acurracy test usa:
accuracy(forecast_usa, test_usa)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.001516823 0.01168741 0.009313908 -0.2142319 1.246589 0.810733 -0.01699187
Test set -0.010577272 0.01976114 0.014115951 -1.0545468 1.410238 1.228729 NA
# acurracy test can:
accuracy(forecast_can, test_can)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.001362652 0.01785827 0.01345327 0.1265779 1.384371 0.8747299 -0.1596669
Test set -0.039026975 0.04291561 0.03902698 -3.9465506 3.946551 2.5375298 NA
# acurracy test mex:
accuracy(forecast_mex, test_mex)
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.002294781 0.03284648 0.02607941 -0.1670161 2.306966 0.9414616 -0.01682975
Test set -0.021383052 0.03014032 0.02397937 -2.1795294 2.433998 0.8656506 NA
####Preditive model - simple exponential smoothing:
# usa model fit:
ss_usa <- ses( train_usa, 10 )
# can model fit:
ss_can <- ses( train_can, 10 )
# mex model fit:
ss_mex <- ses( train_mex, 10 )
# resuidual usa arima:
checkresiduals( ss_usa )
Ljung-Box test
data: Residuals from Simple exponential smoothing
Q* = 2.6291, df = 8, p-value = 0.9554
Model df: 2. Total lags used: 10
# resuidual can arima:
checkresiduals( ss_can )
Ljung-Box test
data: Residuals from Simple exponential smoothing
Q* = 12.394, df = 8, p-value = 0.1345
Model df: 2. Total lags used: 10
# resuidual mex arima:
checkresiduals( ss_mex )
Ljung-Box test
data: Residuals from Simple exponential smoothing
Q* = 26.754, df = 8, p-value = 0.0007796
Model df: 2. Total lags used: 10
# acurracy test usa:
accuracy(ss_usa, test_usa)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.006552439 0.0131990 0.01126788 0.8303007 1.473549 0.9808175 0.01147509
Test set 0.038231707 0.0411608 0.03823171 3.8156039 3.815604 3.3278946 NA
# acurracy test can:
accuracy(ss_can, test_can)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.003981085 0.01942576 0.01508497 0.4014974 1.539588 0.980823 0.1345404
Test set -0.054298409 0.05639916 0.05429841 -5.4762281 5.476228 3.530477 NA
# acurracy test mex:
accuracy(ss_mex, test_mex)
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.004587987 0.03669816 0.02716980 0.4476004 2.411745 0.9808245 0.2596731
Test set -0.040681850 0.04344607 0.04068185 -4.1088595 4.108859 1.4686068 NA
| Model | USA | CAN | MEX |
|---|---|---|---|
| ARIMA | 1.01 | 4.8 | 2.4 |
| TBATS | 1.04 | 3.9 | 2.4 |
| SES | 3.81 | 5.47 | 4.1 |
# variables to adjust models:
There were 21 warnings (use warnings() to see them)
train_usa <- ts_usa[1:length(ts_usa)]
train_can <- ts_can[1:length(ts_can)]
train_mex <- ts_mex[1:length(ts_mex)]
# model fit:
final_arima_usa <- auto.arima( train_usa )
final_tbats_can <- tbats( train_can )
final_arima_mex <- auto.arima( train_mex )
# model forecasting:
final_forecast_usa <- forecast(final_arima_usa, 10)
final_forecast_can <- forecast(final_tbats_can, 10)
final_forecast_mex <- forecast(final_arima_mex, 10)
# visualization in plot:
plot(final_forecast_usa)
plot(final_forecast_can)
plot(final_forecast_mex)
according to the wikipedia the main criticism of FTP is the word “total” as it suggests that all entries should be included but, some inputs such as energy are never included.
I suggest include mean level of household energy consumption.I do not know how to get this data but with the recent popularization of open data culture around the world it is possible that the open data portals like www.data.gov (USA), open.canada (CAN) and developmentseed.org (MEX) can be good starting points